Biblioteki

devtools::install_github("rstudio/EDAWR")
install.packages("plotly",repos = "http://cran.us.r-project.org")
knitr::opts_chunk$set(echo = TRUE, warning = FALSE)

library(EDAWR)
library(dplyr)
library(ggplot2)
library(plotly)
library(tidyverse)

Wczytanie danych

mydf <- tb
knitr::kable(head(mydf))
country year sex child adult elderly
Afghanistan 1995 female NA NA NA
Afghanistan 1995 male NA NA NA
Afghanistan 1996 female NA NA NA
Afghanistan 1996 male NA NA NA
Afghanistan 1997 female 5 96 1
Afghanistan 1997 male 0 26 0

Podsumowanie danych w zbiorze

sprintf("Liczba wierszy: %d", nrow(mydf))
## [1] "Liczba wierszy: 3800"
sprintf("Liczba kolumn: %d", ncol(mydf))
## [1] "Liczba kolumn: 6"
good <- complete.cases(mydf)
n_of_rows_without_NA <- nrow(mydf[good, ])
sprintf("Liczba wierszy bez wartosci NA: %d", n_of_rows_without_NA)
## [1] "Liczba wierszy bez wartosci NA: 3380"
knitr::kable(summary(mydf))
country year sex child adult elderly
Length:3800 Min. :1995 Length:3800 Min. : 0.0 Min. : 0 Min. : 0.0
Class :character 1st Qu.:1999 Class :character 1st Qu.: 25.0 1st Qu.: 1128 1st Qu.: 84.5
Mode :character Median :2004 Mode :character Median : 76.0 Median : 2589 Median : 230.0
NA Mean :2004 NA Mean : 493.2 Mean : 10864 Mean : 1253.0
NA 3rd Qu.:2009 NA 3rd Qu.: 264.5 3rd Qu.: 6706 3rd Qu.: 640.0
NA Max. :2013 NA Max. :25661.0 Max. :731540 Max. :125991.0
NA NA NA NA’s :396 NA’s :413 NA’s :413

Liczba zachorowan z podzialem na plec

mydf <- mutate(mydf, sum=adult+child+elderly)

sex_number_df = group_by(mydf, sex) %>% summarize(sum=sum(sum, na.rm = TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
bar_plot<-ggplot(data=sex_number_df, aes(x=sex, y=sum/1000000, fill=unique(tb[["sex"]]))) + 
  geom_bar(stat="identity") + 
  geom_text(aes(label=sum), vjust=1.6, size=3.5, color='white') +
  labs(x="Plec", y="Suma zachorowan [mln]", fill="Plec") +
  theme_minimal()

bar_plot

Wykres zachorowan w ciagu kolejnych lat

tbt <- group_by(mydf, year) %>% 
  summarize(sum_child=sum(child, na.rm = TRUE), sum_adult=sum(adult, na.rm = TRUE), sum_elderly=sum(elderly, na.rm = TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
temp <- tbt %>%
  select(year, sum_child, sum_adult, sum_elderly) %>%
  gather(key="age", value="sum", -year)
head(temp)
## # A tibble: 6 x 3
##    year age         sum
##   <int> <chr>     <int>
## 1  1995 sum_child 14800
## 2  1996 sum_child 13928
## 3  1997 sum_child 16547
## 4  1998 sum_child 19544
## 5  1999 sum_child 21481
## 6  2000 sum_child 26773
#ggplot(data = tbt, aes(x = year)) +
 # geom_line(aes(y=sum_child), color="red") +
  #geom_line(aes(y=sum_adult), color="blue") +
  #geom_line(aes(y=sum_elderly), color="orange")

ggplot(temp, aes(x = year, y = sum/1000000)) + 
  geom_line(aes(color = age),) + 
  geom_point(aes(color = age)) +
  scale_color_discrete(name = "Wiek", labels = c("Dorosli", "Dzieci", "Osoby starsze"))+
  labs(x="Rok", y="Suma zachorowan [mln]", color="Wiek") +
  theme_minimal()

Wykres zachorowan w ciagu kolejnych lat we wszystkich krajach

tbt <- group_by(mydf, country, year) %>% 
  summarize(sum=sum(sum))
## `summarise()` regrouping output by 'country' (override with `.groups` argument)
tbt[is.na(tbt)] <- 0

#tbt <- filter(tbt, country=="Poland")

plot <- ggplot(data = tbt, aes(x = year, y=sum/1000000, group=country)) + 
  geom_line(aes(color=country), show.legend = FALSE) +
  labs(x="Rok", y="Suma zachorowan [mln]", color="Kraj") +
  theme(legend.position = 'none')
  

ggplotly(plot)